Set Home Directory

# setwd("/home/yourname/NYCTaxiData")

Load data

mydata = read.csv("sample.csv")
dim(mydata)
## [1] 100000     21

Install t-map package (Library for thematic maps) and other required R packages

#install.packages("dplyr")
#install.packages("sf")
#install.packages("curl")
#Restart your R Session
#install.packages("tmap")

Install ggmap

#install.packages("ggmap")

#OR (choose whichever works on your computer)

#install.packages("devtools")
#devtools::install_github("dkahle/ggmap")

Load libraries

library(dplyr)
library(sf)
library(curl)
library(ggmap)
library(tmap)
library(tmaptools)

Download and view map

map<-get_stamenmap(rbind(as.numeric(paste(geocode_OSM("Manhattan")$bbox))), zoom = 11)
ggmap(map)

Plot pickup locations

ggmap(map) + geom_point(aes(x = pickup_longitude, y = pickup_latitude),colour="white", size = 0.01, data = mydata, alpha = .5)

Square binning

plotmap <- ggmap(map) + geom_bin2d(bins=100, data=mydata, aes(x =
pickup_longitude, y = pickup_latitude))
plotmap

Install HexBin

#install.packages("hexbin")

Plot HexBin

plotmapHB <- ggmap(map) + coord_cartesian() + geom_hex(bins=100, data=mydata,
aes(x = pickup_longitude, y = pickup_latitude))
plotmapHB

Load Data into SparklyR

library(sparklyr)
library(dplyr)
sc <- spark_connect(master = "local")
nyc_taxi <- spark_read_csv(sc, name = "taxi_data", path ="sample.csv", header = TRUE, delimiter = ",")

Manual square binning by rounding

nyc_taxi <- nyc_taxi %>%
mutate(pickup_latitude = round(pickup_latitude,3))%>%
mutate(pickup_longitude = round(pickup_longitude,3))%>%
sdf_register("nyc_taxi")

Saving data

spark_write_csv(nyc_taxi,"rounded",header=TRUE,delimiter=",", mode="overwrite")

Calling data summary and saving

nyc_taxi_summary <- nyc_taxi %>%
group_by(pickup_latitude, pickup_longitude) %>%
summarise(n=n()) %>%
sdf_register("nyc_taxi_summary")
#save summary
spark_write_csv(nyc_taxi_summary,"summary",header=TRUE,delimiter=",", mode="overwrite")

Saving summary

plotmap <- ggmap(map) + geom_point(aes(x = pickup_longitude, y = pickup_latitude, colour=n, fill=n), data = nyc_taxi_summary, shape=22, size=0.25)
ggsave("plot.png")